# -*- coding:UTF-8 -*-
# 开发人员: limenghui
# 开发时间：2024-09-21 下午11:25
# 文件名称：
# 开发工具：pycharm


from time import sleep
import requests
import io
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
    'Content-Type': 'application/json;charset=utf-8'
}


options = webdriver.EdgeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 屏蔽密码提示框
# prefs = {'credentials_enable_service':False,'profile.password_manager_enabled':False}
# 反爬虫特征处理
options.add_argument('disable-blink-features=AutomationControlled')

options.add_experimental_option("prefs", {
    "download.default_directory": r"C:\Users\limenghui\PycharmProjects\CITIC_BANK\WORK",
    "download.prompt_for_download": False,
    "plugins.always_open_plugin_dir": True,
    "profile.default_content_settings.popups": 0,
})


driver = webdriver.Edge(options=options)
# url = r'http://nanchang.pbc.gov.cn/nanchang/132354/index.html' # 江西人行
# url = r'http://chongqing.pbc.gov.cn/chongqing/107662/index.html' # 重庆人行
url = 'http://xian.pbc.gov.cn/xian/129420/index.html' # 西安分行
driver.get(url)
driver.implicitly_wait(10)
gk_html = []
for page in range(2):
    # rows = driver.find_elements(By.XPATH, '//*[@id="r_con"]//a') # 人行通用 江西、重庆
    rows = driver.find_elements(By.XPATH, '//*[@id="content_right"]//a')  # 人行通用
    for row in rows:
        if '国库现金管理' in row.text:
            gk_html.append(row.get_attribute('href'))
            #print(row.text)
            #print(row.get_attribute('href'))
        else:
            pass
    next= driver.find_element(By.LINK_TEXT,'下一页')
    next.click()
driver.close()

pdf_url_list = []
for i,url in enumerate(gk_html):
    driver = webdriver.Edge(options=options)
    driver.get(url)
    pubcontent = driver.find_element(By.XPATH, '//*[@aria-label="内容文本区"]').text
    pubpdf_url =  driver.find_element(By.XPATH,'//*[@id="zoom"]/p//a')
    pdf_url = pubpdf_url.get_attribute('href')
    pdf_url_list.append(pdf_url)
    print(pdf_url)
    pubhtml = driver.page_source # 获取页面源代码
    #print(pubcontent)
print(pdf_url_list)
driver.close()

for url in pdf_url_list:
    print(url)
    driver1 = webdriver.Chrome(options=options)
    driver1.get(url)
    sleep(3)
    driver1.quit()